CREATE [TEMPORARY] [EXTERNAL] TABLE [IFNOTEXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later) [(col_name data_type [column_constraint_specification] [COMMENT col_comment], ... [constraint_specification])] [COMMENT table_comment] [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] [SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)] ON ((col_value, col_value, ...), (col_value, col_value, ...), ...) [STOREDAS DIRECTORIES] [ [ROWFORMAT row_format] [STOREDAS file_format] | STOREDBY'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later) ] [LOCATION hdfs_path] [TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later) [AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
CREATE TABLE emp2 LIKE emp; 仅仅复制表结构
CREATE TABLE emp3 as select * from emp; 表结构和表数据都复制
修改内部表为外部表
修改内部表为外部表的语句是:alter table emp set tblproperties('EXTERNAL'='TRUE')
注意:上面的EXTERNAL 和TRUE 为大写关键字
CREATE TABLE bucket_table( id int, name string, age string ) clustered by (id) sorted by (id) into 4 buckets ROW format delimited fields terminated by ',';
1
load data local inpath '/home/hadoop/data/bucket.txt' into table bucket_table
select * from emp tablesample (bucket 1 out of 2);
id
name
age
2
tom2
23
4
tom4
24
6
tom6
26
8
tom8
28
可以用sorted by (id) 来设置有序分桶表
1 2 3 4 5 6 7
CREATE TABLE bucket_sort( id int, name string, age string ) clustered by (id) sorted by (id) into 4 buckets ROW format delimited fields terminated by ',';
none: Disable hive.fetch.task.conversion (value added in Hive 0.14.0 with HIVE-8389)
设置为none 时候所有任务都走MR
minimal: SELECT *, FILTER on partition columns (WHERE and HAVING clauses), LIMIT only
设置为minimal的时候 SELECT *, FILTER on partition columns (WHERE and HAVING clauses), LIMIT only 这些走MR
hive (bigdata_hive)> INSERT OVERWRITE LOCAL DIRECTORY '/home/hadoop/tmp/emp.txt' SELECT * FROM emp; FAILED: RuntimeException org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.hdfs.server.namenode.SafeModeException): Cannot createdirectory /tmp/hive/hadoop/25894946-03c1-4547-a249-1a01f8b0e606/hive_2019-12-16_22-19-09_624_6134799912934093237-1.Name node isinsafe mode. Resources arelowon NN. Please addor free up more resources then turn offsafemode manually. NOTE: If you turn offsafemodebefore adding resources, the NN will immediately returntosafe mode. Use"hdfs dfsadmin -safemode leave"to turn safemode off. at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkNameNodeSafeMode(FSNamesystem.java:1529) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirsInt(FSNamesystem.java:4530) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:4505) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:884) at org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.mkdirs(AuthorizationProviderProxyClientProtocol.java:328) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:641) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2278) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2274) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2272)